{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "This notebook demonstrates how we can build a working neural net using NumPy library only"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 1. Initialize"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-05-18T21:14:27.374378Z",
     "start_time": "2019-05-18T21:14:27.365566Z"
    },
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import numpy as np \n",
    "np.random.seed(42) # for reproducibility"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 2. Generate data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-05-18T21:14:27.400704Z",
     "start_time": "2019-05-18T21:14:27.377332Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[37.45401188, 95.07143064, 73.19939418, ..., 94.6707915 ,\n",
       "        39.74879924, 21.7140404 ],\n",
       "       [37.36408185, 33.29120962, 17.61539125, ..., 30.36984691,\n",
       "        44.33200065, 17.22648144]])"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X_num_row, X_num_col = [2, 10000] # Row is no. of feature, col is no. of datum points\n",
    "X_raw = np.random.rand(X_num_row,X_num_col) * 100\n",
    "X_raw"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-05-18T21:14:27.411696Z",
     "start_time": "2019-05-18T21:14:27.403128Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(3, 10000)"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "y_raw = np.concatenate(([(X_raw[0,:] + X_raw[1,:])], [(X_raw[0,:] - X_raw[1,:])], np.abs([(X_raw[0,:] - X_raw[1,:])])))\n",
    "# for input a and b, output is a+b; a-b and |a-b|\n",
    "y_num_row, y_num_col = y_raw.shape\n",
    "y_raw.shape"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 3. Split train-test dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-05-18T21:14:27.419829Z",
     "start_time": "2019-05-18T21:14:27.413643Z"
    },
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "train_ratio = 0.7\n",
    "num_train_datum = int(train_ratio*X_num_col)\n",
    "X_raw_train = X_raw[:,0:num_train_datum]\n",
    "X_raw_test = X_raw[:,num_train_datum:]\n",
    "\n",
    "\n",
    "y_raw_train = y_raw[:,0:num_train_datum]\n",
    "y_raw_test = y_raw[:,num_train_datum:]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 4. Standardize data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-05-18T21:14:27.462358Z",
     "start_time": "2019-05-18T21:14:27.422168Z"
    },
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "class scaler:\n",
    "    def __init__(self, mean, std):\n",
    "        self.mean = mean\n",
    "        self.std = std\n",
    "\n",
    "def get_scaler(row):\n",
    "    mean = np.mean(row)\n",
    "    std = np.std(row)\n",
    "    return scaler(mean, std)\n",
    "\n",
    "def standardize(data, scaler):\n",
    "    return (data - scaler.mean) / scaler.std\n",
    "\n",
    "def unstandardize(data, scaler):\n",
    "    return (data * scaler.std) + scaler.mean"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-05-18T21:14:27.472718Z",
     "start_time": "2019-05-18T21:14:27.464724Z"
    },
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# Construct scalers from training set\n",
    "\n",
    "X_scalers = [get_scaler(X_raw_train[row,:]) for row in range(X_num_row)]\n",
    "X_train = np.array([standardize(X_raw_train[row,:], X_scalers[row]) for row in range(X_num_row)])\n",
    "\n",
    "y_scalers = [get_scaler(y_raw_train[row,:]) for row in range(y_num_row)]\n",
    "y_train = np.array([standardize(y_raw_train[row,:], y_scalers[row]) for row in range(y_num_row)])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-05-18T21:14:27.478689Z",
     "start_time": "2019-05-18T21:14:27.474370Z"
    },
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# Apply those scalers to testing set\n",
    "\n",
    "X_test = np.array([standardize(X_raw_test[row,:], X_scalers[row]) for row in range(X_num_row)])\n",
    "\n",
    "y_test = np.array([standardize(y_raw_test[row,:], y_scalers[row]) for row in range(y_num_row)])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-05-18T21:14:27.488529Z",
     "start_time": "2019-05-18T21:14:27.480426Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[2.740664837931815e-16, 5.227564413092166e-17]\n",
      "[0.9999999999999999, 1.0]\n",
      "[-4.608377171929793e-16, 1.0658141036401503e-17, 6.471014200672341e-18]\n",
      "[0.9999999999999999, 1.0, 1.0]\n"
     ]
    }
   ],
   "source": [
    "# Check if data has been standardized\n",
    "\n",
    "print([X_train[row,:].mean() for row in range(X_num_row)]) # should be close to zero\n",
    "print([X_train[row,:].std() for row in range(X_num_row)])  # should be close to one\n",
    "\n",
    "print([y_train[row,:].mean() for row in range(y_num_row)]) # should be close to zero\n",
    "print([y_train[row,:].std() for row in range(y_num_row)])  # should be close to one"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 5. Construct a neural net"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-05-18T21:14:27.506539Z",
     "start_time": "2019-05-18T21:14:27.491123Z"
    },
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "class layer:\n",
    "    def __init__(self, layer_index, is_output, input_dim, output_dim, activation):\n",
    "        self.layer_index = layer_index # zero indicates input layer\n",
    "        self.is_output = is_output # true indicates output layer, false otherwise\n",
    "        self.input_dim = input_dim\n",
    "        self.output_dim = output_dim\n",
    "        self.activation = activation\n",
    "        \n",
    "        # the multiplication constant is sorta arbitrary\n",
    "        if layer_index != 0:\n",
    "            self.W = np.random.randn(output_dim, input_dim) * np.sqrt(2/input_dim) \n",
    "            self.b = np.random.randn(output_dim, 1) * np.sqrt(2/input_dim)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-05-18T21:14:27.526822Z",
     "start_time": "2019-05-18T21:14:27.508751Z"
    },
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# Change layers_dim to configure your own neural net!            \n",
    "layers_dim = [X_num_row, 4, 4, y_num_row] # input layer --- hidden layers --- output layers\n",
    "neural_net = []\n",
    "\n",
    "# Construct the net layer by layer\n",
    "for layer_index in range(len(layers_dim)):\n",
    "    if layer_index == 0: # if input layer\n",
    "        neural_net.append(layer(layer_index, False, 0, layers_dim[layer_index], 'irrelevant'))\n",
    "    elif layer_index+1 == len(layers_dim): # if output layer\n",
    "        neural_net.append(layer(layer_index, True, layers_dim[layer_index-1], layers_dim[layer_index], activation='linear'))\n",
    "    else: \n",
    "        neural_net.append(layer(layer_index, False, layers_dim[layer_index-1], layers_dim[layer_index], activation='relu'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-05-18T21:40:36.862788Z",
     "start_time": "2019-05-18T21:40:36.851594Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Predicted number of hyperparameters: 47\n",
      "Actual number of hyperparameters: 47\n",
      "Number of data: 10000\n"
     ]
    }
   ],
   "source": [
    "# Simple check on overfitting\n",
    "\n",
    "pred_n_param = sum([(layers_dim[layer_index]+1)*layers_dim[layer_index+1] for layer_index in range(len(layers_dim)-1)])\n",
    "act_n_param = sum([neural_net[layer_index].W.size + neural_net[layer_index].b.size for layer_index in range(1,len(layers_dim))])\n",
    "print(f'Predicted number of hyperparameters: {pred_n_param}')\n",
    "print(f'Actual number of hyperparameters: {act_n_param}')\n",
    "print(f'Number of data: {X_num_col}')\n",
    "\n",
    "if act_n_param >= X_num_col:\n",
    "    raise Exception('It will overfit.')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 6. Perform forward propagation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-05-18T21:14:27.564092Z",
     "start_time": "2019-05-18T21:14:27.558739Z"
    },
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def activation(input_, act_func):\n",
    "    if act_func == 'relu':\n",
    "        return np.maximum(input_, np.zeros(input_.shape))\n",
    "    elif act_func == 'linear':\n",
    "        return input_\n",
    "    else:\n",
    "        raise Exception('Activation function is not defined.')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-05-18T21:14:27.572809Z",
     "start_time": "2019-05-18T21:14:27.566185Z"
    },
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def forward_prop(input_vec, layers_dim=layers_dim, neural_net=neural_net):\n",
    "    neural_net[0].A = input_vec # Define A in input layer for for-loop convenience\n",
    "    for layer_index in range(1,len(layers_dim)): # W,b,Z,A are undefined in input layer\n",
    "        neural_net[layer_index].Z = np.add(np.dot(neural_net[layer_index].W, neural_net[layer_index-1].A), neural_net[layer_index].b)\n",
    "        neural_net[layer_index].A = activation(neural_net[layer_index].Z, neural_net[layer_index].activation)\n",
    "    return neural_net[layer_index].A"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-05-18T21:14:27.812885Z",
     "start_time": "2019-05-18T21:14:27.574665Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "True"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# test run\n",
    "\n",
    "forward_prop(X_train).shape == y_train.shape # should be True"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 7. Perform back propagation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-05-18T21:14:27.843912Z",
     "start_time": "2019-05-18T21:14:27.815170Z"
    },
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def get_loss(y, y_hat, metric='mse'):\n",
    "    if metric == 'mse':\n",
    "        individual_loss = 0.5 * (y_hat - y) ** 2\n",
    "        return np.mean([np.linalg.norm(individual_loss[:,col], 2) for col in range(individual_loss.shape[1])])\n",
    "    else:\n",
    "        raise Exception('Loss metric is not defined.')\n",
    "\n",
    "def get_dZ_from_loss(y, y_hat, metric):\n",
    "    if metric == 'mse':\n",
    "        return y_hat - y\n",
    "    else:\n",
    "        raise Exception('Loss metric is not defined.')\n",
    "\n",
    "def get_dactivation(A, act_func):\n",
    "    if act_func == 'relu':\n",
    "        return np.maximum(np.sign(A), np.zeros(A.shape)) # 1 if backward input >0, 0 otherwise; then diaganolize\n",
    "    elif act_func == 'linear':\n",
    "        return np.ones(A.shape)\n",
    "    else:\n",
    "        raise Exception('Activation function is not defined.')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-05-18T21:14:27.870828Z",
     "start_time": "2019-05-18T21:14:27.847510Z"
    },
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def backward_prop(y, y_hat, metric='mse', layers_dim=layers_dim, neural_net=neural_net, num_train_datum=num_train_datum):\n",
    "    for layer_index in range(len(layers_dim)-1,0,-1):\n",
    "        if layer_index+1 == len(layers_dim): # if output layer\n",
    "            dZ = get_dZ_from_loss(y, y_hat, metric)\n",
    "        else: \n",
    "            dZ = np.multiply(np.dot(neural_net[layer_index+1].W.T, dZ), \n",
    "                             get_dactivation(neural_net[layer_index].A, neural_net[layer_index].activation))\n",
    "        dW = np.dot(dZ, neural_net[layer_index-1].A.T) / num_train_datum\n",
    "        db = np.sum(dZ, axis=1, keepdims=True) / num_train_datum\n",
    "        \n",
    "        neural_net[layer_index].dW = dW\n",
    "        neural_net[layer_index].db = db"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 8. Optimize Iteratively (Gradient Descent)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-05-18T21:31:57.379056Z",
     "start_time": "2019-05-18T21:14:27.873523Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.3502\n",
      "0.3450\n",
      "0.3450\n",
      "0.3450\n",
      "0.3450\n",
      "0.3450\n",
      "0.3450\n",
      "0.3450\n",
      "0.3450\n",
      "0.3450\n"
     ]
    }
   ],
   "source": [
    "learning_rate = 0.01\n",
    "max_epoch = 1000000\n",
    "\n",
    "for epoch in range(1,max_epoch+1):\n",
    "    y_hat_train = forward_prop(X_train) # update y_hat\n",
    "    backward_prop(y_train, y_hat_train) # update (dW,db)\n",
    "    \n",
    "    for layer_index in range(1,len(layers_dim)):        # update (W,b)\n",
    "        neural_net[layer_index].W = neural_net[layer_index].W - learning_rate * neural_net[layer_index].dW\n",
    "        neural_net[layer_index].b = neural_net[layer_index].b - learning_rate * neural_net[layer_index].db\n",
    "    \n",
    "    if epoch % 100000 == 0:\n",
    "        print(f'{get_loss(y_train, y_hat_train):.4f}')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 9. Test"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-05-18T21:31:57.414111Z",
     "start_time": "2019-05-18T21:31:57.382068Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.3194238845483295"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# test loss\n",
    "\n",
    "get_loss(y_test, forward_prop(X_test))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-05-18T21:31:57.427484Z",
     "start_time": "2019-05-18T21:31:57.416673Z"
    },
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def predict(X_raw_any):\n",
    "    X_any = np.array([standardize(X_raw_any[row,:], X_scalers[row]) for row in range(X_num_row)])\n",
    "    y_hat = forward_prop(X_any)\n",
    "    y_hat_any = np.array([unstandardize(y_hat[row,:], y_scalers[row]) for row in range(y_num_row)])\n",
    "    return y_hat_any"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-05-18T21:31:57.444148Z",
     "start_time": "2019-05-18T21:31:57.430541Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[ 98.56378114, 102.63221611,   5.56349031, 562.15335048],\n",
       "       [-26.25088759,  14.80155747,  21.3245024 , 221.60597885],\n",
       "       [ 48.08520947,  25.18205121,  15.71597463, -64.01423955]])"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "predict(np.array([[30,70],[70,30],[3,5],[888,122]]).T)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  },
  "varInspector": {
   "cols": {
    "lenName": 16,
    "lenType": 16,
    "lenVar": 40
   },
   "kernels_config": {
    "python": {
     "delete_cmd_postfix": "",
     "delete_cmd_prefix": "del ",
     "library": "var_list.py",
     "varRefreshCmd": "print(var_dic_list())"
    },
    "r": {
     "delete_cmd_postfix": ") ",
     "delete_cmd_prefix": "rm(",
     "library": "var_list.r",
     "varRefreshCmd": "cat(var_dic_list()) "
    }
   },
   "types_to_exclude": [
    "module",
    "function",
    "builtin_function_or_method",
    "instance",
    "_Feature"
   ],
   "window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
